Libraries

library(tidyverse)
library(naniar)
library(visdat)
library(lubridate)
library(ggridges)
library(htmltools)
library(readr)
library(forcats)

# install.packages("devtools")
#devtools::install_github("dill/emoGG")
library(emoGG)
library(leaflet) # leaflet map

Data

Reading in and exploring the data.

longline <- readxl::read_xlsx("data/longline97-2008.xlsx")

palangre87_97 <- read_csv("data/palangre_87_97 - palangre_87_97.csv")

palangre97_18 <- read_csv("data/palangre_97_18 - palangre_97_18.csv")

longline97-2008.csv

Data Dictionary:

Variable Description
identifier Unique Vessel Identifier
vessel_code Vessel Code
vessel_code2 Vessel Code (taking into account company?)
vessel_code Name of Vessel
gross_tonnage Gross tonnage of the vessel
vessel_length Length of the Vessel (m)
vessel_width Vessel Width
stanchion Stanchions on the vessel
primary_engine_hp Horsepower of the primary engine
second_engine_hp Horsepower of the second engine
carrying_capacity The carrying capacity of the vessel (units?)
year_constructed The year the vessel was constructed
vessel_criteria The criteria for the vessel
vessel_classification The classification of the vessel

palangre_87_97 - palangre_87_97.csv

The data has 35030 observations and 93 variables.

Data Dictionary:

Variable Description
CBU_ORAC Vessel Code: 76 vessels
crucero Trip? Range: 1-55
pesqueria Fishery
fecha Date
Dia Day
Mes Month
Year Year
lance Haul
latcalini Latitude
loncalini Longitude
aquas Waters (management zone?)
zmin Minimum depth (m)
zmax Maximum depth (m)
nanzuelo Number of hooks (anzuelos)
lonlmad Mid longitude
Total.de.peso Total weight (kg)
X0 - X999 Fish Species (see reference table)

palangre_97_18 - palangre_97_18.csv

The data has 62178 observations and 134 variables.

Data Dictionary:

Variable Description
CBU_ORAC Vessel Code: 76 vessels
crucero Trip? Range: 1-55
pesqueria Fishery
fecha Date
Dia Day
Mes Month
Year Year
lance Haul
latcalini Latitude
loncalini Longitude
latcalini_new Latitude (new)
loncalini_new Longitude (new)
aquas Waters (management zone?)
zmin Minimum depth (m)
zmax Maximum depth (m)
nanzuelo Number of hooks (anzuelos)
zmin_new Minimum depth (m) new
zmax_new Maximum depth (m) new
lonlmad Mid longitude
Total.de.peso Total weight (kg)
X0 - X999 Fish Species (see reference table)

Part I: Longline Data

Exploring the vessels

  1. How many vessels are in the data set? What is the vessel code?

Vessel Code

longline %>%
  distinct(vessel_code)
## # A tibble: 10 × 1
##    vessel_code
##          <dbl>
##  1      400062
##  2      400065
##  3      400075
##  4      400082
##  5      400088
##  6      400118
##  7      400061
##  8      400081
##  9      400133
## 10      400512

Vessel Code2

longline %>%
  distinct(vessel_code2)
## # A tibble: 10 × 1
##    vessel_code2
##           <dbl>
##  1       400062
##  2       400065
##  3       400075
##  4       400082
##  5       400088
##  6       400118
##  7       400061
##  8       400081
##  9       400133
## 10           NA
  1. How complete is the vessel data?
longline %>%
  select(vessel_code, 
         vessel_code2, 
         vessel_code, 
         gross_tonnage, 
         vessel_length, 
         vessel_width, 
         primary_engine_hp, 
         second_engine_hp, 
         carrying_capacity) %>%
  visdat::vis_miss()

  1. What are their dimensions (length, width, gross tonnage) and carrying capacity? What is the relationship between these variables?
longline %>%
  group_by(vessel_code) %>%
  distinct(vessel_code, vessel_length, vessel_width, gross_tonnage, carrying_capacity) %>%
  ggplot(aes(x = vessel_width, y = vessel_length, label = vessel_code)) +
  geom_point(aes(size = gross_tonnage)) +
  geom_text(check_overlap = TRUE, 
            size = 3,
            hjust = 0, 
            nudge_x = 0.1) +
  lims(x = c(8, 10.5)) +
  labs(x = "Vessel Width (m)", 
       y = "Vessel Length (m)",
       size = "Gross Tonnage (tonnes)")

longline %>%
  group_by(vessel_code) %>%
  distinct(vessel_code, vessel_length, vessel_width, gross_tonnage, carrying_capacity) %>%
  mutate(height = carrying_capacity,
         width = carrying_capacity) %>%
  ggplot(aes(x = vessel_width, y = vessel_length, label = vessel_code, size = I(carrying_capacity/6000))) +
  geom_emoji(emoji = "26f4", show.legend = TRUE) +
  geom_text(check_overlap = TRUE, 
            size = 3,
            hjust = 0, 
            nudge_x = 0.1) +
  lims(x = c(8, 10.5)) +
  labs(x = "Vessel Width (m)", 
       y = "Vessel Length (m)",
       size = "Carrying Capacity")

Observations:

  • One of the ships has a small carrying capacity for its size. One of the ships is much smaller than the others: 400082.

  • There is also one vessel for which we are missing a vessel code.

  • Vessel 400088 has only 2 entries.

  1. What is their vessel classification?
longline %>%
  select(vessel_code, 
         gross_tonnage, 
         vessel_length, 
         vessel_width, 
         vessel_classification, 
         year_constructed) %>%
  visdat::vis_miss()

unique(longline$vessel_classification)
## [1] "NA"     NA       "casual"

Observations:

  • vessel_classification doesn’t give us any information.
longline %>%
  group_by(vessel_code) %>%
  distinct(vessel_code, vessel_length, vessel_width, gross_tonnage, carrying_capacity, year_constructed) 
## # A tibble: 10 × 6
## # Groups:   vessel_code [10]
##    vessel_code gross_tonnage vessel_length vessel_width carrying_capacity
##          <dbl>         <dbl>         <dbl>        <dbl>             <dbl>
##  1      400062          505.          44.5         8.4               350 
##  2      400065          753           53.4         9.5               200 
##  3      400075          537           46.8         8.5               571 
##  4      400082          292           26.6         8                  72 
##  5      400088          653.          48.5         9.2               364.
##  6      400118          752           52.2         9.5               550 
##  7      400061          465.          46.3         8.35              350 
##  8      400081          292           26.6         8                  72 
##  9      400133           NA           NA          NA                  NA 
## 10      400512           NA           NA          NA                  NA 
## # … with 1 more variable: year_constructed <dbl>
  1. When were the vessels constructed?
longline %>%
  group_by(vessel_code) %>%
  distinct(vessel_code, vessel_length, vessel_width, gross_tonnage, carrying_capacity, year_constructed) %>%
  ggplot(aes(x = year_constructed, y = gross_tonnage, label = vessel_code)) +
  geom_point(aes(size = carrying_capacity)) +
  geom_text(check_overlap = TRUE, 
            size = 3,
            hjust = 0, 
            nudge_x = 1) +
  labs(x = "Year Constructed", 
       y = "Gross Tonnage (tonnes)",
       size = "Carrying Capacity") +
  xlim(1965, 2000)

  1. When were the vessels active?
longline <- longline %>%
  mutate(trip_return_date_clean = lubridate::as_datetime("1900-01-01") + days(round(trip_return_date)),
         trip_leaving_date_clean = lubridate::as_datetime("1900-01-01") + days(round(trip_leaving_date)),
         trip_duration = trip_return_date_clean - trip_leaving_date_clean,
         vessel_code = as.factor(vessel_code)) # Not sure here what to use for the origin for the date.
longline %>%
  distinct(vessel_code, trip_number, .keep_all = TRUE) %>%
  ggplot() +
  geom_point(aes(x = trip_leaving_date_clean, y = vessel_code)) +
  geom_point(aes(x = trip_return_date_clean, y = vessel_code)) +
  labs(y = "Vessel Code", x = "Date")

Observations

  • We have a lot more data early on in the 1997-2001 period than in the second management period.
  1. How did the duration of trips vary by boat? by year?
longline %>%
  distinct(vessel_code, trip_number, .keep_all = TRUE) %>%
  ggplot(aes(x = trip_duration, y = as.factor(vessel_code), fill = as.factor(vessel_code))) + 
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Trip Duration (Days)", 
       fill = "Vessel Code")

What were the trip statistics (number of trips made and the minimum, mean, and maximum trip duration)?

longline %>%
  group_by(vessel_code) %>%
  select(vessel_code, trip_number, trip_duration, haul_number) %>%
  distinct(vessel_code, trip_number, haul_number, .keep_all = TRUE) %>%
  summarise(max_number_trips = max(trip_number),
            max_trip_duration = max(trip_duration),
            min_trip_duration = min(trip_duration),
            mean_trip_duration = round(mean(trip_duration), 2),
            max_hauls = max(haul_number))
## # A tibble: 10 × 6
##    vessel_code max_number_trips max_trip_duration min_trip_duration
##    <fct>                  <dbl> <drtn>            <drtn>           
##  1 400061                    10  63 days          19 days          
##  2 400062                    11  60 days           8 days          
##  3 400065                     8  73 days          33 days          
##  4 400075                     8  88 days          19 days          
##  5 400081                    28  27 days           5 days          
##  6 400082                    29 161 days           5 days          
##  7 400088                     4  67 days          60 days          
##  8 400118                     8  91 days          24 days          
##  9 400133                    14   9 days           4 days          
## 10 400512                     8   6 days           5 days          
## # … with 2 more variables: mean_trip_duration <drtn>, max_hauls <dbl>

Doublecheck your workflow for this one to see if it makes sense.

  1. How did the haul weight vary for each ship over time?
longline %>%
  filter(total_capture_kg > 0 & total_capture_kg < 20000) %>%
  ggplot(aes(x = total_capture_kg, y = vessel_code, fill = vessel_code)) + 
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Total Catch (1000 kg)", 
       fill = "Vessel Code") +
  facet_wrap(~Year) +
  scale_x_continuous(breaks = c(0, 5000, 10000, 15000, 20000), labels = c("0", "5", "10", "15", "20"))

Fishing effort in hauls over time

longline %>%
  group_by(Year, vessel_code, Month) %>%
  summarise(hauls = n()) %>%
  ggplot(aes(x = Month, 
             y = hauls, 
             group = vessel_code, 
             color = vessel_code)) +
  geom_line() +
  scale_color_viridis_d() + 
  coord_polar() +
  facet_wrap(~Year) +
  labs(color = "Vessel Code")

The same plot above without the coord_polar

longline %>%
  group_by(Year, vessel_code, Month) %>%
  summarise(hauls = n()) %>%
  ggplot(aes(x = Month, 
             y = hauls, 
             group = vessel_code, 
             color = vessel_code)) +
  geom_line() +
  scale_color_viridis_d() + 
  facet_wrap(~Year) +
  labs(color = "Vessel Code")

Create an interactive map of fishing haul locations

longline <- longline %>%
  mutate(startlondd = -startlondd,
         startlatdd = -startlatdd,
         endlondd = -endlondd, 
         endlatdd = -endlatdd,
         group = as.factor(paste(vessel_code, trip_number, haul_number)))
pal <- colorNumeric(
  palette = "Oranges",
  domain = longline$Genypterus_blacodes)


longline$labels <- sprintf("<strong>Vessel Code: %s</strong><br/>%g proportion of catch", 
                  longline$vessel_code, longline$Genypterus_blacodes) %>% lapply(htmltools::HTML)

(start_position_map <- leaflet(data = longline) %>%
  addProviderTiles(providers$Esri.OceanBasemap) %>%
  addCircleMarkers(data = longline[longline$vessel_code == 400062,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(Genypterus_blacodes), 
                   label = longline[longline$vessel_code == 400062,]$labels,
                   radius = 2,
                   group = "Vessel 400062") %>%
    addCircleMarkers(data = longline[longline$vessel_code == 400065,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(Genypterus_blacodes), 
                   label = longline[longline$vessel_code == 400065,]$labels,
                   radius = 2,
                   group = "Vessel 400065") %>%
    addCircleMarkers(data = longline[longline$vessel_code == 400075,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(Genypterus_blacodes), 
                   label = longline[longline$vessel_code == 400075,]$labels,
                   radius = 2,
                   group = "Vessel 400075") %>%
    addCircleMarkers(data = longline[longline$vessel_code == 400082,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(Genypterus_blacodes), 
                   label = longline[longline$vessel_code == 400082,]$labels,
                   radius = 2,
                   group = "Vessel 400082") %>%
    addCircleMarkers(data = longline[longline$vessel_code == 400088,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(Genypterus_blacodes), 
                   label = longline[longline$vessel_code == 400088,]$labels,
                   radius = 2,
                   group = "Vessel 400088") %>%
    addCircleMarkers(data = longline[longline$vessel_code == 400118,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(Genypterus_blacodes), 
                   label = longline[longline$vessel_code == 400118,]$labels,
                   radius = 2,
                   group = "Vessel 400118") %>%
    addCircleMarkers(data = longline[longline$vessel_code == 400061,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(Genypterus_blacodes), 
                   label = longline[longline$vessel_code == 400061,]$labels,
                   radius = 2,
                   group = "Vessel 400061") %>%
    addCircleMarkers(data = longline[longline$vessel_code == 400081,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(Genypterus_blacodes), 
                   label = longline[longline$vessel_code == 400081,]$labels,
                   radius = 2,
                   group = "Vessel 400081") %>%
    addCircleMarkers(data = longline[longline$vessel_code == 400133,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(Genypterus_blacodes), 
                   label = longline[longline$vessel_code == 400133,]$labels,
                   radius = 2,
                   group = "Vessel 400133") %>%
    addCircleMarkers(data = longline[longline$vessel_code == 400512,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(Genypterus_blacodes), 
                   label = longline[longline$vessel_code == 400512,]$labels,
                   radius = 2,
                   group = "Vessel 400512") %>%
    addLegend(position = "bottomright",
              pal = pal,
              values = ~Genypterus_blacodes,
              title = "Proportion of catch that is pink cusk-eel",
              opacity = 1)) %>%
    addLayersControl(
    overlayGroups = c("Vessel 400065", "Vessel 400062", "Vessel 400075", 
                      "Vessel 400082", "Vessel 400088", "Vessel 400118",
                      "Vessel 400061", "Vessel 400081", "Vessel 400133",
                      "Vessel 400512"),
    options = layersControlOptions(collapsed = FALSE)
    )
(end_position_map <- leaflet(data = longline) %>%
  addProviderTiles(providers$Esri.OceanBasemap) %>%
  addCircleMarkers(lng = ~endlondd, 
                   lat = ~endlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(Genypterus_blacodes),
                   label = longline$labels,
                   radius = 2) %>%
    addLegend(position = "bottomright",
              pal = pal,
              values = ~Genypterus_blacodes,
              title = "Proportion of catch that is pink cusk-eel",
              opacity = 1))

Catch Composition: What are the most common species?

What are the highest proportions of species across the different hauls?

longline %>%
  select(vessel_code, trip_number, leaving_port, landing_port, haul_number, Genypterus_blacodes, Merluccius_australis, total_catch, starts_with(match = "P_")) %>%
  pivot_longer(cols = c(Merluccius_australis, starts_with(match = "P_"), Genypterus_blacodes), names_to = "prop_species", values_to = "proportion") %>%
  filter(proportion > 0) %>%
  group_by(vessel_code, trip_number) %>%
  slice_max(order_by = proportion, n = 10) %>%
  ungroup() %>%
  select(prop_species) %>%
  unique()
## # A tibble: 16 × 1
##    prop_species        
##    <chr>               
##  1 Genypterus_blacodes 
##  2 Merluccius_australis
##  3 P_trama             
##  4 P_granadero1        
##  5 P_lenguado.2        
##  6 P_reineta           
##  7 P_peje2             
##  8 P_varios            
##  9 P_brotula           
## 10 P_cojinoba.2        
## 11 P_merluza1          
## 12 P_merluza2          
## 13 P_coji              
## 14 P_granadero2        
## 15 P_bacalao.1         
## 16 P_raya56
  1. How often do species come up in the hauls? Presence/Absence
longline %>%
  select(vessel_code, trip_number, leaving_port, landing_port, haul_number, total_catch, Merluccius_australis, Genypterus_blacodes, starts_with(match = "P_")) %>%
  pivot_longer(cols = c(Merluccius_australis, starts_with(match = "P_"), Genypterus_blacodes), names_to = "prop_species", values_to = "proportion") %>%
  filter(proportion > 0) %>%
  group_by(prop_species) %>%
  summarise(n = n()) %>%
  arrange(desc(n))
## # A tibble: 77 × 2
##    prop_species             n
##    <chr>                <int>
##  1 Genypterus_blacodes  11329
##  2 Merluccius_australis 11075
##  3 P_bacalao.1           4787
##  4 P_cojinoba.2          2634
##  5 P_varios              2150
##  6 P_reineta             1533
##  7 P_brotula             1067
##  8 P_cab2                 410
##  9 P_merluza1             359
## 10 P_coji                 265
## # … with 67 more rows

How does the weight of congrio dorado and merluza_comun/meluza_del_sur vary in hauls?

longline %>%
  filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
  select(vessel_code, trip_number, leaving_port, landing_port, haul_number, total_catch, merluza_comun, congrio_dorado, merluza_del_sur, bacalao_de_profundidad, Year) %>%
  pivot_longer(cols = c(merluza_comun, congrio_dorado, merluza_del_sur, bacalao_de_profundidad), names_to = "species", values_to = "weight_kg") %>%
  ggplot(aes(x = weight_kg, y = vessel_code, fill = species)) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Weight (kg)", 
       fill = "Species") #+

  #scale_x_continuous(breaks = c(0, 5000, 10000, 15000, 20000), labels = c("0", "5", "10", "15", "20"))

How complete is the fishing information?

visdat::vis_dat(longline[, c("J_time_line_set", "J_time_laying_line", "J_time_start_haul", "J_time_end_haul", "c_laying_time", "c_soak_time", "c_haul_time", "min_depth", "max_depth", "mid_depth", "max_min", "hook_type", "number_of_hooks", "bait", "hook_size")])

What is the range in the soak time?

longline %>%
  filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
  ggplot(aes(x = c_soak_time, y = vessel_code)) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Soak Time (hours)") 

What is the range in haul time?

longline %>%
  filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
  ggplot(aes(x = c_haul_time, y = vessel_code)) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Haul Time (hours)") 

What is the range in the min depth?

longline %>%
  filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
  ggplot(aes(x = min_depth, y = vessel_code)) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Min Depth (m)") 

What is the range in the max depth?

longline %>%
  filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
  ggplot(aes(x = max_depth, y = vessel_code)) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Max Depth (m)") +
  facet_wrap(~man_zone)

What is the range in the mid depth?

longline %>%
  filter(total_capture_kg > 0 & total_capture_kg < 20000, vessel_name != "ISLA SOFIA") %>%
  ggplot(aes(x = mid_depth, y = vessel_code)) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Mid Depth (m)") +
  facet_wrap(~man_zone)

Looking at the hauls and filtering out only the groups that have the most:

longline %>%
  select(vessel_code, vessel_name, trip_number, leaving_port, landing_port, haul_number, total_catch, Merluccius_australis, Genypterus_blacodes, starts_with(match = "P_"), man_zone) %>%
  pivot_longer(cols = c(Merluccius_australis, starts_with(match = "P_"), Genypterus_blacodes), names_to = "prop_species", values_to = "proportion") %>%
  filter(proportion > 0, prop_species %in% c("Genypterus_blacodes", "Merluccius_australis", "P_bacalao.1", "P_cojinoba.2", "P_reineta", "P_brotula", "P_cab2", "P_merluza1", "P_coji"), vessel_name != "ISLA SOFIA") %>%
  ggplot(aes(x = proportion, y = vessel_code, fill = prop_species)) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Proportion of Catch") +
  facet_wrap(~man_zone)

Looking at the haul proportions:

longline %>%
  select(vessel_code, trip_number, leaving_port, landing_port, haul_number, Genypterus_blacodes, Merluccius_australis, total_catch, starts_with(match = "P_")) %>%
  pivot_longer(cols = c(Merluccius_australis, starts_with(match = "P_"), Genypterus_blacodes), names_to = "prop_species", values_to = "proportion") %>%
  filter(proportion > 0) %>%
  group_by(vessel_code, trip_number)
## # A tibble: 37,804 × 8
## # Groups:   vessel_code, trip_number [120]
##    vessel_code trip_number leaving_port landing_port haul_number total_catch
##    <fct>             <dbl>        <dbl>        <dbl>       <dbl>       <dbl>
##  1 400062                1           57           57           1        7269
##  2 400062                1           57           57           1        7269
##  3 400062                1           57           57           1        7269
##  4 400062                1           57           57           2        6458
##  5 400062                1           57           57           2        6458
##  6 400062                1           57           57           2        6458
##  7 400062                1           57           57           3        6881
##  8 400062                1           57           57           3        6881
##  9 400062                1           57           57           4        5333
## 10 400062                1           57           57           4        5333
## # … with 37,794 more rows, and 2 more variables: prop_species <chr>,
## #   proportion <dbl>

Part II: Palangre 87-97 Data

Exploring the vessels

  1. How many vessels are in the data set?

Vessel Code

palangre87_97 %>%
  distinct(CBU_ORAC)
## # A tibble: 76 × 1
##    CBU_ORAC
##       <dbl>
##  1   400061
##  2   400062
##  3   400063
##  4   400064
##  5   400065
##  6   400066
##  7   400067
##  8   400068
##  9   400069
## 10   400070
## # … with 66 more rows
  1. When were the vessels active?
palangre87_97 <- palangre87_97 %>%
  mutate(fecha = lubridate::dmy(fecha),
         CBU_ORAC = as.factor(CBU_ORAC)) 
str(palangre87_97$fecha)
##  Date[1:35030], format: "1988-01-01" "1988-01-02" "1988-01-03" "1988-01-04" "1988-01-05" ...
ggplot(data = palangre87_97) +
  geom_density_ridges(aes(x = fecha, y = CBU_ORAC, fill = CBU_ORAC), alpha = 0.75) +
  labs(y = "Vessel Code", x = "Date")

  1. How did the duration of trips vary by boat? by year?
palangre87_97 %>%
  distinct(CBU_ORAC, trip_number, .keep_all = TRUE) %>%
  ggplot(aes(x = trip_duration, y = as.factor(vessel_code), fill = as.factor(vessel_code))) + 
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Trip Duration (Days)", 
       fill = "Vessel Code")

What were the trip statistics (number of trips made and the minimum, mean, and maximum trip duration)?

palangre87_97 %>%
  group_by(vessel_code) %>%
  select(vessel_code, trip_number, trip_duration, haul_number) %>%
  distinct(vessel_code, trip_number, haul_number, .keep_all = TRUE) %>%
  summarise(max_number_trips = max(trip_number),
            max_trip_duration = max(trip_duration),
            min_trip_duration = min(trip_duration),
            mean_trip_duration = round(mean(trip_duration), 2),
            max_hauls = max(haul_number))

Doublecheck your workflow for this one to see if it makes sense.

  1. How did the haul weight vary for each ship over time?
palangre87_97 %>%
  filter(Total.de.peso >= 0, Year <= 1992) %>%
  drop_na(CBU_ORAC) %>%
  ggplot(aes(x = Total.de.peso, y = CBU_ORAC, fill = CBU_ORAC)) + 
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Total Catch (1000 kg)", 
       fill = "Vessel Code") +
  facet_wrap(~Year) +
  scale_x_continuous(breaks = c(0, 5000, 10000, 15000, 20000), labels = c("0", "5", "10", "15", "20"))

palangre87_97 %>%
  filter(Total.de.peso > 0, Year > 1992) %>%
  ggplot(aes(x = Total.de.peso, y = CBU_ORAC, fill = CBU_ORAC)) + 
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Total Catch (1000 kg)", 
       fill = "Vessel Code") +
  facet_wrap(~Year) +
  scale_x_continuous(breaks = c(0, 5000, 10000, 15000, 20000), labels = c("0", "5", "10", "15", "20"))

Fishing effort in hauls over time

palangre87_97 %>%
  group_by(Year, CBU_ORAC, Mes) %>%
  summarise(hauls = n()) %>%
  ggplot(aes(x = Mes, 
             y = hauls, 
             group = CBU_ORAC, 
             color = CBU_ORAC)) +
  geom_line() +
  scale_color_viridis_d() + 
  coord_polar() +
  facet_wrap(~Year) +
  labs(color = "Vessel Code")

The same plot above without the coord_polar

palangre87_97 %>%
  group_by(Year, CBU_ORAC, Mes) %>%
  summarise(hauls = n()) %>%
  ggplot(aes(x = Mes, 
             y = hauls, 
             group = CBU_ORAC, 
             color = CBU_ORAC)) +
  geom_line() +
  scale_color_viridis_d() + 
  facet_wrap(~Year) +
  labs(color = "Vessel Code") +
  scale_x_continuous(breaks = c(3, 6, 9, 12))

How much has each vessel fished?

palangre87_97 %>%
  group_by(CBU_ORAC) %>%
  summarise(hauls = n()) %>%
  ggplot(aes(x = fct_reorder(CBU_ORAC, hauls),
             y = hauls)) +
  geom_col() +
  scale_color_viridis_d() +
  coord_flip() +
  labs(x = "Vessel Code")

Create an interactive map of fishing haul locations

palangre87_97 <- palangre87_97 %>%
  mutate(startlatdd = -latcalini/10000,
         startlondd = -loncalini/10000,
         group = as.factor(paste(CBU_ORAC, crucero, lance)),
         P_X6 = X6/Total.de.peso)
pal <- colorNumeric(
  palette = "Oranges",
  domain = palangre87_97$P_X6)


palangre87_97$labels <- sprintf("<strong>Vessel Code: %s</strong><br/>%g proportion of catch", 
                  palangre87_97$CBU_ORAC, palangre87_97$P_X6) %>% lapply(htmltools::HTML)

(start_position_map <- leaflet(data = palangre87_97) %>%
  addProviderTiles(providers$Esri.OceanBasemap) %>%
  addCircleMarkers(data = palangre87_97[palangre87_97$CBU_ORAC == 400062,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(P_X6), 
                   label = palangre87_97[palangre87_97$CBU_ORAC == 400062,]$labels,
                   radius = 2,
                   group = "Vessel 400062") %>%
    addCircleMarkers(data = palangre87_97[palangre87_97$CBU_ORAC == 400065,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(P_X6), 
                   label = palangre87_97[palangre87_97$CBU_ORAC == 400065,]$labels,
                   radius = 2,
                   group = "Vessel 400065") %>%
    addCircleMarkers(data = palangre87_97[palangre87_97$CBU_ORAC == 400075,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre87_97[palangre87_97$CBU_ORAC == 400075,]$labels,
                   radius = 2,
                   group = "Vessel 400075") %>%
    addCircleMarkers(data = palangre87_97[palangre87_97$CBU_ORAC == 400082,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre87_97[palangre87_97$CBU_ORAC == 400082,]$labels,
                   radius = 2,
                   group = "Vessel 400082") %>%
    addCircleMarkers(data = palangre87_97[palangre87_97$CBU_ORAC == 400088,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre87_97[palangre87_97$CBU_ORAC == 400088,]$labels,
                   radius = 2,
                   group = "Vessel 400088") %>%
    addCircleMarkers(data = palangre87_97[palangre87_97$CBU_ORAC == 400118,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre87_97[palangre87_97$CBU_ORAC == 400118,]$labels,
                   radius = 2,
                   group = "Vessel 400118") %>%
    addCircleMarkers(data = palangre87_97[palangre87_97$CBU_ORAC == 400061,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre87_97[palangre87_97$CBU_ORAC == 400061,]$labels,
                   radius = 2,
                   group = "Vessel 400061") %>%
    addCircleMarkers(data = palangre87_97[palangre87_97$CBU_ORAC == 400081,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre87_97[palangre87_97$CBU_ORAC == 400081,]$labels,
                   radius = 2,
                   group = "Vessel 400081") %>%
    addCircleMarkers(data = palangre87_97[palangre87_97$CBU_ORAC == 400133,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre87_97[palangre87_97$CBU_ORAC == 400133,]$labels,
                   radius = 2,
                   group = "Vessel 400133") %>%
    addCircleMarkers(data = palangre87_97[palangre87_97$CBU_ORAC == 400512,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre87_97[palangre87_97$CBU_ORAC == 400512,]$labels,
                   radius = 2,
                   group = "Vessel 400512") %>%
    addLegend(position = "bottomright",
              pal = pal,
              values = ~P_X6,
              title = "Proportion of catch that is pink cusk-eel",
              opacity = 1)) %>%
    addLayersControl(
    overlayGroups = c("Vessel 400065", "Vessel 400062", "Vessel 400075",
                      "Vessel 400082", "Vessel 400088", "Vessel 400118",
                      "Vessel 400061", "Vessel 400081", "Vessel 400133",
                      "Vessel 400512"),
    options = layersControlOptions(collapsed = FALSE)
    )

Catch Composition: What are the most common species?

What are the highest proportions of species across the different hauls?

palangre87_97 %>%
  select(CBU_ORAC, crucero, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(CBU_ORAC, Year) %>%
  arrange(desc(peso)) %>%
  distinct(species_code, .keep_all = TRUE) %>%
  slice_max(order_by = peso, n = 5) %>%
  ungroup() %>%
  select(species_code) %>%
  unique()
## # A tibble: 25 × 1
##    species_code
##    <chr>       
##  1 X6          
##  2 X2          
##  3 X200        
##  4 X99         
##  5 X25         
##  6 X5          
##  7 X8          
##  8 X4          
##  9 X37         
## 10 X27         
## # … with 15 more rows

Organized by proportion

palangre87_97 %>%
  select(CBU_ORAC, crucero, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(CBU_ORAC, Year) %>%
  arrange(desc(prop)) %>%
  distinct(species_code, .keep_all = TRUE) %>%
  slice_max(order_by = prop, n = 5) %>%
  ungroup() %>%
  select(species_code) %>%
  unique()
## # A tibble: 25 × 1
##    species_code
##    <chr>       
##  1 X2          
##  2 X6          
##  3 X99         
##  4 X25         
##  5 X200        
##  6 X5          
##  7 X999        
##  8 X8          
##  9 X4          
## 10 X37         
## # … with 15 more rows
  1. How often do species come up in the hauls? Presence/Absence
palangre87_97 %>%
  select(CBU_ORAC, crucero, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(species_code) %>%
  summarise(n = n()) %>%
  arrange(desc(n))
## # A tibble: 29 × 2
##    species_code     n
##    <chr>        <int>
##  1 X2           25302
##  2 X6           24569
##  3 X37          14679
##  4 X99          10957
##  5 X5            9350
##  6 X4            7583
##  7 X8            3726
##  8 X56           2023
##  9 X81           1427
## 10 X96           1332
## # … with 19 more rows
species_list <- palangre87_97 %>%
  select(CBU_ORAC, crucero, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(species_code) %>%
  summarise(n = n()) %>%
  arrange(desc(n)) %>%
  select(species_code)


species_list_100 <- palangre87_97 %>%
  select(CBU_ORAC, crucero, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(species_code) %>%
  summarise(n = n()) %>%
  filter(n >= 100) %>%
  arrange(desc(n)) %>%
  select(species_code)

species_list_1000 <- palangre87_97 %>%
  select(CBU_ORAC, crucero, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(species_code) %>%
  summarise(n = n()) %>%
  filter(n >= 1000) %>%
  arrange(desc(n)) %>%
  select(species_code)


species_list_10000 <- palangre87_97 %>%
  select(CBU_ORAC, crucero, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(species_code) %>%
  summarise(n = n()) %>%
  filter(n >= 10000) %>%
  arrange(desc(n)) %>%
  select(species_code)
palangre87_97 %>%
  select(CBU_ORAC, crucero, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0, species_code %in% c(species_list_10000$species_code)) %>%
  group_by(Year, species_code) %>%
  summarise(species_weight = sum(peso)) %>%
  ggplot() +
  geom_area(aes(x = Year, y = species_weight, fill = species_code, group = species_code), alpha = 0.7) +
  scale_color_viridis_d() +
  labs(fill = "Species Code",
       y = "Species Weight (kg)")

palangre87_97 %>%
  select(CBU_ORAC, crucero, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(species_code %in% species_list_1000$species_code) %>%
  filter(peso > 0, !(species_code %in% c(species_list_10000$species_code))) %>%
  group_by(Year, species_code) %>%
  summarise(species_weight = sum(peso)) %>%
  ggplot() +
  geom_area(aes(x = Year, y = species_weight, fill = species_code, group = species_code), alpha = 0.7) +
  scale_color_viridis_d() +
  labs(fill = "Species Code",
       y = "Species Weight (kg)")

How does the weight of congrio dorado and merluza_comun/meluza_del_sur vary in hauls?

palangre87_97 %>%
  select(CBU_ORAC, crucero, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(Total.de.peso > 0, Total.de.peso < 20000, species_code %in% c("X2", "X6")) %>%
  ggplot(aes(x = peso, y = as.factor(species_code), fill = as.factor(species_code))) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Weight (1000 kgs)", 
       fill = "Species") +
  facet_wrap(~Year) +
  scale_x_continuous(breaks = c(0, 5000, 10000, 15000, 20000), labels = c("0", "5", "10", "15", "20"))

What is the range in the min depth?

palangre87_97 %>%
  filter(Total.de.peso > 0 & Total.de.peso < 20000,
         zmin < 2500) %>%
  ggplot(aes(x = zmin, y = CBU_ORAC)) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Min Depth (m)") 

What is the range in the max depth?

palangre87_97 %>%
  filter(Total.de.peso > 0 & Total.de.peso < 20000,
         zmax < 2500) %>%
  ggplot(aes(x = zmax, y = CBU_ORAC)) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Min Depth (m)") 

Looking at the hauls and filtering out only the groups that have the most:

palangre87_97 %>%
  select(CBU_ORAC, crucero, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0, species_code %in% c(species_list_10000$species_code), CBU_ORAC %in% c(unique(longline$vessel_code))) %>%
  ggplot(aes(x = prop, y = CBU_ORAC, fill = species_code)) +
  ggridges::geom_density_ridges(alpha = 0.5) +
  scale_color_viridis_d() +
  labs(y = "Vessel Code",
       x = "Proportion of Catch",
       fill = "Species Code")

Part III: palangre 1997-2018

Exploring the vessels

  1. How many vessels are in the data set?

Vessel Code

palangre97_18 %>%
  distinct(CBU_ORAC)
## # A tibble: 60 × 1
##    CBU_ORAC
##       <dbl>
##  1   941225
##  2   400075
##  3   942434
##  4   940391
##  5   940239
##  6   941612
##  7   400065
##  8   400061
##  9   941096
## 10   400099
## # … with 50 more rows
  1. When were the vessels active?
palangre97_18 <- palangre97_18 %>%
  mutate(fecha = lubridate::dmy(fecha),
         CBU_ORAC = as.factor(CBU_ORAC)) %>%
  filter(Year < 2030, Year > 1991)
str(palangre97_18$fecha)
##  Date[1:62176], format: "2018-07-13" "2018-07-13" "2018-07-13" "2018-07-12" "2018-07-11" ...
ggplot(data = palangre97_18) +
  geom_density_ridges(aes(x = fecha, y = CBU_ORAC, fill = CBU_ORAC), alpha = 0.75) +
  labs(y = "Vessel Code", x = "Date")

  1. How did the duration of trips vary by boat? by year?
palangre97_18 %>%
  distinct(CBU_ORAC, trip_number, .keep_all = TRUE) %>%
  ggplot(aes(x = trip_duration, y = as.factor(vessel_code), fill = as.factor(vessel_code))) + 
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Trip Duration (Days)", 
       fill = "Vessel Code")

What were the trip statistics (number of trips made and the minimum, mean, and maximum trip duration)?

palangre97_18 %>%
  group_by(vessel_code) %>%
  select(vessel_code, trip_number, trip_duration, haul_number) %>%
  distinct(vessel_code, trip_number, haul_number, .keep_all = TRUE) %>%
  summarise(max_number_trips = max(trip_number),
            max_trip_duration = max(trip_duration),
            min_trip_duration = min(trip_duration),
            mean_trip_duration = round(mean(trip_duration), 2),
            max_hauls = max(haul_number))

Doublecheck your workflow for this one to see if it makes sense.

  1. How did the haul weight vary for each ship over time?
palangre97_18 %>%
  filter(Total.de.peso >= 0, Year <= 2002) %>%
  drop_na(CBU_ORAC) %>%
  ggplot(aes(x = Total.de.peso, y = CBU_ORAC, fill = CBU_ORAC)) + 
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Total Catch (1000 kg)", 
       fill = "Vessel Code") +
  facet_wrap(~Year) +
  scale_x_continuous(breaks = c(0, 5000, 10000, 15000, 20000), labels = c("0", "5", "10", "15", "20"))

palangre97_18 %>%
  filter(Total.de.peso > 0, Year > 2002) %>%
  ggplot(aes(x = Total.de.peso, y = CBU_ORAC, fill = CBU_ORAC)) + 
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Total Catch (1000 kg)", 
       fill = "Vessel Code") +
  facet_wrap(~Year) +
  scale_x_continuous(breaks = c(0, 5000, 10000, 15000, 20000), labels = c("0", "5", "10", "15", "20"))

Fishing effort in hauls over time

palangre97_18 %>%
  group_by(Year, CBU_ORAC, Mes) %>%
  summarise(hauls = n()) %>%
  ggplot(aes(x = Mes, 
             y = hauls, 
             group = CBU_ORAC, 
             color = CBU_ORAC)) +
  geom_line() +
  scale_color_viridis_d() + 
  facet_wrap(~Year) +
  labs(color = "Vessel Code") +
  scale_x_continuous(breaks = c(3, 6, 9, 12))

How much has each vessel fished?

palangre97_18 %>%
  group_by(CBU_ORAC) %>%
  summarise(hauls = n()) %>%
  ggplot(aes(x = fct_reorder(CBU_ORAC, hauls),
             y = hauls)) +
  geom_col() +
  scale_color_viridis_d() +
  coord_flip() +
  labs(x = "Vessel Code")

Create an interactive map of fishing haul locations

palangre97_18 <- palangre97_18 %>%
  mutate(startlatdd = -latcalini/10000,
         startlondd = -loncalini/10000,
         group = as.factor(paste(CBU_ORAC, pesqueria, lance)),
         P_X6 = X6/Total.de.peso)
pal <- colorNumeric(
  palette = "Oranges",
  domain = palangre97_18$P_X6)


palangre97_18$labels <- sprintf("<strong>Vessel Code: %s</strong><br/>%g proportion of catch", 
                  palangre97_18$CBU_ORAC, palangre97_18$P_X6) %>% lapply(htmltools::HTML)

(start_position_map <- leaflet(data = palangre97_18) %>%
  addProviderTiles(providers$Esri.OceanBasemap) %>%
  addCircleMarkers(data = palangre97_18[palangre97_18$CBU_ORAC == 400062,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(P_X6), 
                   label = palangre97_18[palangre97_18$CBU_ORAC == 400062,]$labels,
                   radius = 2,
                   group = "Vessel 400062") %>%
    addCircleMarkers(data = palangre97_18[palangre97_18$CBU_ORAC == 400065,], 
                   lng = ~startlondd, 
                   lat = ~startlatdd, 
                   clusterOptions = NULL, 
                   color = ~pal(P_X6), 
                   label = palangre97_18[palangre97_18$CBU_ORAC == 400065,]$labels,
                   radius = 2,
                   group = "Vessel 400065") %>%
    addCircleMarkers(data = palangre97_18[palangre97_18$CBU_ORAC == 400075,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre97_18[palangre97_18$CBU_ORAC == 400075,]$labels,
                   radius = 2,
                   group = "Vessel 400075") %>%
    addCircleMarkers(data = palangre97_18[palangre97_18$CBU_ORAC == 400082,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre97_18[palangre97_18$CBU_ORAC == 400082,]$labels,
                   radius = 2,
                   group = "Vessel 400082") %>%
    addCircleMarkers(data = palangre97_18[palangre97_18$CBU_ORAC == 400088,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre97_18[palangre97_18$CBU_ORAC == 400088,]$labels,
                   radius = 2,
                   group = "Vessel 400088") %>%
    addCircleMarkers(data = palangre97_18[palangre97_18$CBU_ORAC == 400118,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre97_18[palangre97_18$CBU_ORAC == 400118,]$labels,
                   radius = 2,
                   group = "Vessel 400118") %>%
    addCircleMarkers(data = palangre97_18[palangre97_18$CBU_ORAC == 400061,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre97_18[palangre97_18$CBU_ORAC == 400061,]$labels,
                   radius = 2,
                   group = "Vessel 400061") %>%
    addCircleMarkers(data = palangre97_18[palangre97_18$CBU_ORAC == 400081,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre97_18[palangre97_18$CBU_ORAC == 400081,]$labels,
                   radius = 2,
                   group = "Vessel 400081") %>%
    addCircleMarkers(data = palangre97_18[palangre97_18$CBU_ORAC == 400133,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre97_18[palangre97_18$CBU_ORAC == 400133,]$labels,
                   radius = 2,
                   group = "Vessel 400133") %>%
    addCircleMarkers(data = palangre97_18[palangre97_18$CBU_ORAC == 400512,],
                   lng = ~startlondd,
                   lat = ~startlatdd,
                   clusterOptions = NULL,
                   color = ~pal(P_X6),
                   label = palangre97_18[palangre97_18$CBU_ORAC == 400512,]$labels,
                   radius = 2,
                   group = "Vessel 400512") %>%
    addLegend(position = "bottomright",
              pal = pal,
              values = ~P_X6,
              title = "Proportion of catch that is pink cusk-eel",
              opacity = 1)) %>%
    addLayersControl(
    overlayGroups = c("Vessel 400065", "Vessel 400062", "Vessel 400075",
                      "Vessel 400082", "Vessel 400088", "Vessel 400118",
                      "Vessel 400061", "Vessel 400081", "Vessel 400133",
                      "Vessel 400512"),
    options = layersControlOptions(collapsed = FALSE)
    )

Catch Composition: What are the most common species?

What are the highest proportions of species across the different hauls?

palangre97_18 %>%
  select(CBU_ORAC, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(CBU_ORAC, Year) %>%
  arrange(desc(peso)) %>%
  distinct(species_code, .keep_all = TRUE) %>%
  slice_max(order_by = peso, n = 5) %>%
  ungroup() %>%
  select(species_code) %>%
  unique()
## # A tibble: 35 × 1
##    species_code
##    <chr>       
##  1 X6          
##  2 X2          
##  3 X37         
##  4 X5          
##  5 X4          
##  6 X27         
##  7 X96         
##  8 X1          
##  9 X99         
## 10 X120        
## # … with 25 more rows

Organized by proportion

palangre97_18 %>%
  select(CBU_ORAC, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(CBU_ORAC, Year) %>%
  arrange(desc(prop)) %>%
  distinct(species_code, .keep_all = TRUE) %>%
  slice_max(order_by = prop, n = 5) %>%
  ungroup() %>%
  select(species_code) %>%
  unique()
## # A tibble: 36 × 1
##    species_code
##    <chr>       
##  1 X37         
##  2 X6          
##  3 X2          
##  4 X5          
##  5 X4          
##  6 X27         
##  7 X96         
##  8 X1          
##  9 X99         
## 10 X24         
## # … with 26 more rows
  1. How often do species come up in the hauls? Presence/Absence
palangre97_18 %>%
  select(CBU_ORAC, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(species_code) %>%
  summarise(n = n()) %>%
  arrange(desc(n))
## # A tibble: 54 × 2
##    species_code     n
##    <chr>        <int>
##  1 X37          47558
##  2 X6           18157
##  3 X2           18056
##  4 X99           3411
##  5 X5            3391
##  6 X96           3167
##  7 X27           2981
##  8 X17           2789
##  9 X4            1896
## 10 X16           1861
## # … with 44 more rows
species_list <- palangre97_18 %>%
  select(CBU_ORAC, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(species_code) %>%
  summarise(n = n()) %>%
  arrange(desc(n)) %>%
  select(species_code)


species_list_100 <- palangre97_18 %>%
  select(CBU_ORAC, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(species_code) %>%
  summarise(n = n()) %>%
  filter(n >= 100) %>%
  arrange(desc(n)) %>%
  select(species_code)

species_list_1000 <- palangre97_18 %>%
  select(CBU_ORAC, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(species_code) %>%
  summarise(n = n()) %>%
  filter(n >= 1000) %>%
  arrange(desc(n)) %>%
  select(species_code)


species_list_10000 <- palangre97_18 %>%
  select(CBU_ORAC, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0) %>%
  group_by(species_code) %>%
  summarise(n = n()) %>%
  filter(n >= 10000) %>%
  arrange(desc(n)) %>%
  select(species_code)
palangre97_18 %>%
  select(CBU_ORAC, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0, species_code %in% c(species_list_10000$species_code)) %>%
  group_by(Year, species_code) %>%
  summarise(species_weight = sum(peso)) %>%
  ggplot() +
  geom_area(aes(x = Year, y = species_weight, fill = species_code, group = species_code), alpha = 0.7) +
  scale_color_viridis_d() +
  labs(fill = "Species Code",
       y = "Species Weight (kg)")

palangre97_18 %>%
  select(CBU_ORAC, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(species_code %in% species_list_1000$species_code) %>%
  filter(peso > 0, !(species_code %in% c(species_list_10000$species_code))) %>%
  group_by(Year, species_code) %>%
  summarise(species_weight = sum(peso)) %>%
  ggplot() +
  geom_area(aes(x = Year, y = species_weight, fill = species_code, group = species_code), alpha = 0.7) +
  scale_color_viridis_d() +
  labs(fill = "Species Code",
       y = "Species Weight (kg)")

How does the weight of congrio dorado and merluza_comun/meluza_del_sur vary in hauls?

palangre97_18 %>%
  select(CBU_ORAC, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(Total.de.peso > 0, Total.de.peso < 20000, species_code %in% c("X2", "X6")) %>%
  ggplot(aes(x = peso, y = as.factor(species_code), fill = as.factor(species_code))) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Weight (1000 kgs)", 
       fill = "Species") +
  facet_wrap(~Year) +
  scale_x_continuous(breaks = c(0, 5000, 10000, 15000, 20000), labels = c("0", "5", "10", "15", "20"))

What is the range in the min depth?

palangre97_18 %>%
  filter(Total.de.peso > 0 & Total.de.peso < 20000,
         zmin < 2500) %>%
  ggplot(aes(x = zmin, y = CBU_ORAC)) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Min Depth (m)") 

What is the range in the max depth?

palangre97_18 %>%
  filter(Total.de.peso > 0 & Total.de.peso < 20000,
         zmax < 2500) %>%
  ggplot(aes(x = zmax, y = CBU_ORAC)) +
  geom_density_ridges(alpha = 0.75) +
  scale_fill_viridis_d() +
  labs(y = "Vessel Code",
       x = "Min Depth (m)") 

Looking at the hauls and filtering out only the groups that have the most:

palangre97_18 %>%
  select(CBU_ORAC, lance, Year, fecha, Total.de.peso, starts_with(match = "X")) %>%
  pivot_longer(cols = starts_with(match = "X"), names_to = "species_code", values_to = "peso") %>%
  mutate(prop = peso/Total.de.peso) %>%
  filter(peso > 0, species_code %in% c(species_list_10000$species_code), CBU_ORAC %in% c(unique(longline$vessel_code))) %>%
  ggplot(aes(x = prop, y = CBU_ORAC, fill = species_code)) +
  ggridges::geom_density_ridges(alpha = 0.5) +
  scale_color_viridis_d() +
  labs(y = "Vessel Code",
       x = "Proportion of Catch",
       fill = "Species Code")